twenty20bowling2007 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2007twenty20bowlingrating.csv")
twenty20bowling2008 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2008twenty20bowlingrating.csv")
twenty20bowling2009 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2009twenty20bowlingrating.csv")
twenty20bowling2010 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2010twenty20bowlingrating.csv")
twenty20bowling2011 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2011twenty20bowlingrating.csv")
twenty20bowling2012 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2012twenty20bowlingrating.csv")
twenty20bowling2013 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2013twenty20bowlingrating.csv")
twenty20bowling2014 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2014twenty20bowlingrating.csv")
twenty20bowling2015 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2015twenty20bowlingrating.csv")
twenty20bowling2016 <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Ratings\\2016twenty20bowlingrating.csv")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataTwenty20Bowling <- bind_rows(twenty20bowling2007, twenty20bowling2008, twenty20bowling2009, twenty20bowling2010, twenty20bowling2011,
                             twenty20bowling2012, twenty20bowling2013, twenty20bowling2014, twenty20bowling2015, twenty20bowling2016)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
summary(dataTwenty20Bowling)
##      Name               Rating     
##  Length:1000        Min.   :  0.0  
##  Class :character   1st Qu.:255.8  
##  Mode  :character   Median :380.0  
##                     Mean   :370.1  
##                     3rd Qu.:508.2  
##                     Max.   :846.0
library(ggplot2)

dataTwenty20Bowling <- dataTwenty20Bowling %>%
  group_by(Name) %>%
  summarise(avg = mean(Rating))

set.seed(20)

ballcluster <- kmeans(dataTwenty20Bowling[, 2], 5)

ballcluster$cluster <- as.factor(ballcluster$cluster)

ggplot(dataTwenty20Bowling, aes(dataTwenty20Bowling$Name, avg, color = ballcluster$cluster)) +
  geom_point(size = 2) +
  scale_color_manual(labels = c("Good", "Best", "Useless", "Better", "Average"), values = c("blue", "red", "green", "cyan", "black")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ggtitle("Twenty20 Bowling Ratings(2007-2016)")

dat <- arrange(dataTwenty20Bowling, desc(avg)) %>%
  mutate(rank = 1:nrow(dataTwenty20Bowling))

dataTwenty20Bowling <- merge(dataTwenty20Bowling, dat, by  = "Name") 

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- plot_ly(dataTwenty20Bowling, x = ~Name, y = ~avg.x, type = 'scatter', 
             mode = 'markers', color = ballcluster$cluster, 
             text = ~paste('Rank: ', rank))

p